######################################################################
#Replication file for "The Impact of China's AIIB on the World Bank"
#Authors: Jing Qian, James Vreeland, Jianzhi Zhao
#Codes to replicate Table 1, Figure 1 - Figure 3 in the main text
######################################################################
rm(list = ls())

#To install the bpCausal package (ver 0.0.1)
# install.packages('devtools', repos = 'http://cran.us.r-project.org') # if not already installed
# devtools::install_github('liulch/bpCausal')

#Load bpCausal package
library(bpCausal)

#R Version used: 4.0.2 (2020-06-22)
#Note: Results from bpCausal might differ (~0.1% level), depending on R version
#With no impact to the results and conclusions

#Other packages used
library(tidyverse)
library(RColorBrewer)

#Setwd
setwd("C:/Users/qianj/Dropbox (Princeton)/AIIB_WB_Replication")

###################################
#Figure 1
###################################
#-------------
#(1) Load data
#-------------
load("data/data_main.RData")

#-------------
#(2) Prepare data
#-------------
#By-group averages
wb.avg.founder = df.main %>%
  subset(., aiib_founder == 1) %>%
  group_by(year) %>%
  summarise(avg_hard_project_count = mean(hard_project_count_all))
wb.avg.nonfounder = df.main %>%
  subset(., aiib_founder == 0) %>%
  group_by(year) %>%
  summarise(avg_hard_project_count = mean(hard_project_count_all))

#Difference
wb.diff = (wb.avg.founder[, -1] - wb.avg.nonfounder[, -1]) %>%
  cbind(wb.avg.founder$year, .)
colnames(wb.diff)[1] = "year"


#-------------
#(3) Plot
#-------------
#Initialization
cex = 3
colors = brewer.pal(n = 3,
                    name = "Dark2")

#Setup
png(filename = "figure/Figure_1.png",
    height = 800,
    width = 2400)

par(mar = c(2, 5, 5, 1),
    mfcol = c(1, 2))

#--------------------------
#Left: Raw data of average project counts
plot(x = wb.avg.founder$year,
     y = wb.avg.founder$avg_hard_project_count,
     type = "l",
     ylim = c(0, 6),
     xaxt = "n",
     ylab = "Project Count",
     xlab = "",
     cex = cex,
     cex.axis = cex,
     cex.lab = cex,
     cex.main = cex,
     lwd = cex,
     col = colors[2],
     main = "Average New World Bank Infrastructure Projects")

#Non-founder
lines(x = wb.avg.nonfounder$year,
      y = wb.avg.nonfounder$avg_hard_project_count,
      lwd = cex,
      lty = "dashed",
      col = colors[3])

#Axis ticks
axis(side = 1,
     at = seq(1992, 2018, 2),
     tck = 0.015,
     cex.axis = 2)

#Ablines
abline(v = 2016,
       lwd = cex,
       lty = "dashed",
       col = "gray")

#Legend
legend("topleft",
       legend = c("AIIB Founders",
                  "Other Countries"),
       col = colors[2:3],
       lty = c("solid", "dashed"),
       lwd = cex,
       bty = "n",
       cex = cex)

#--------------------------
#Right: Only the difference
plot(x = wb.diff$year,
     y = wb.diff$avg_hard_project_count,
     type = "l",
     ylim = c(0, 3),
     lwd = cex,
     cex = cex,
     cex.axis = cex,
     cex.lab = cex,
     cex.main = cex,
     xaxt = "n",
     yaxt = "n",
     xlab = "",
     ylab = "Project Count",
     main = "Difference in New World Bank Infrastructure Projects")

#Axis ticks
axis(side = 1,
     at = seq(1992, 2019, 2),
     tck = 0.015,
     cex.axis = 2)
axis(side = 2,
     at = seq(0, 3, 1),
     cex.axis = cex)

#Ablines
abline(v = 2016,
       lwd = cex,
       lty = "dashed",
       col = "gray")

#Legend
legend("topleft",
       legend = c("Difference"),
       cex = cex,
       lwd = cex,
       bty = "n")

#--------------------------
#Close
dev.off()


###################################
#Table 1, Column (1)
#~1.4 minutes
###################################
#-------------
#(1) Initialization
#-------------
load("data/data_main.RData") #Load data
Y.use = "hard_project_count_all" #Outcome variable name
D.use = "aiib_founder_2016" #Treatment variable name
index.use = c("iso2c", "year") #Index for unit and time

#-------------
#(2) Estimation
#-------------
set.seed(1234)
bpcausal.tab1a = bpCausal(data = df.main,
                          Yname = Y.use,
                          Dname = D.use,
                          Xname = c(),
                          Zname = c(),
                          Aname = c(),
                          index = index.use,
                          re = "both",
                          ar1 = TRUE,
                          r = 10,
                          niter = 15000,
                          burn = 5000,
                          xlasso = 1,
                          zlasso = 1,
                          alasso = 1,
                          flasso = 1,
                          a1 = 0.001,
                          a2 = 0.001,
                          b1 = 0.001,
                          b2 = 0.001,
                          c1 = 0.001,
                          c2 = 0.001,
                          p1 = 0.001,
                          p2 = 0.001)

#-------------
#(3) Get estimated coefficients and intervals
#-------------
att.tab1.1 = effSummary(bpcausal.tab1a)
att.tab1.1$est.avg #Estimated ATT as reported in Table 1, Column (1)
att.tab1.1$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table 1

#-------------
#(4) Number of observations
#-------------
df.use.complete = df.main[, c(Y.use, D.use, index.use)] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#-------------
#(5) Treated and Control Units
#-------------
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units


###################################
#Table 1, Column (2) 
#~2.8 minutes
###################################
#-------------
#(1) Initialization
#-------------
load("data/data_main.RData") #Load data
Y.use = "hard_project_count_all" #Outcome variable name
D.use = "aiib_founder_2016" #Treatment variable name
index.use = c("iso2c", "year") #Index for unit and time
#Covariates
covs.all = c("gdppc_log_lag",
             "population_log_lag",
             "election_either_lag",
             "fdi_gdp_lag",
             "debt_gni_lag",
             "oda_gni_lag",
             "polity2_lag",
             "unsc_lag",
             "IdealPointDistance_lag")

#-------------
#(2) Estimation
#-------------
set.seed(1234)
bpcausal.tab1b = bpCausal(data = df.main,
                          Yname = "hard_project_count_all",
                          Dname = "aiib_founder_2016",
                          Xname = covs.all,
                          Zname = covs.all,
                          Aname = covs.all,
                          index = c("iso2c", "year"),
                          re = "both",
                          ar1 = TRUE,
                          r = 10,
                          niter = 15000,
                          burn = 5000,
                          xlasso = 1,
                          zlasso = 1,
                          alasso = 1,
                          flasso = 1,
                          a1 = 0.001,
                          a2 = 0.001,
                          b1 = 0.001,
                          b2 = 0.001,
                          c1 = 0.001,
                          c2 = 0.001,
                          p1 = 0.001,
                          p2 = 0.001)

#-------------
#(3) Get estimated coefficients and intervals
#-------------
att.tab1.2 = effSummary(bpcausal.tab1b)
att.tab1.2$est.avg #Estimated ATT as reported in Table 1, Column (1)
att.tab1.2$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table 1

coef.tab1.2 = coefSummary(bpcausal.tab1b)
coef.tab1.2$est.beta[-1,] #Estimated invariant component of covariate coefficients (ignore the intercept in first row)
coef.tab1.2$est.beta[-1,] %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table 1

#-------------
#(4) Number of observations
#-------------
df.use.complete = df.main[, c(Y.use, D.use, covs.all, index.use)] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#-------------
#(5) Treated and Control Units
#-------------
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units

#-------------
#(6) Save result for producing figures in the appendix
#Note: Requires at least 250 MB of disk space
#-------------
result.original = bpcausal.tab1b
save(result.original,
     file = "result/bpcausal_original.RData")


###################################
#Figure 2
###################################
#-------------
#(1) Parameters
#-------------
#Load results: Column (2) of Table 1
load("result/bpcausal_original.RData")
result.use = effSummary(result.original)

#Values
y.tr = result.use$est.eff$observed
y.cf = result.use$est.eff$estimated_counterfactual

#Titles
xtitle = ""
ytitle = "New World Bank infrastructure Projects"

#Limits
ylim = c(0, 6)
newx = 1992:2019
t.time = 2016

#Cex
cex = 2


#-------------
#(2) Plot
#-------------
#Setup
png(filename = "figure/Figure_2.png",
    height = 800,
    width = 1200)
par(mar = c(2, 5, 1, 1))

#-----------
#Plot
#Empty plot
plot(1,
     type = "n",
     ylim = ylim,
     xlim = range(newx),
     xlab = xtitle,
     xaxt = "n",
     ylab = ytitle,
     cex = cex,
     cex.lab = cex,
     cex.axis = cex)

#Observed
lines(x = newx,
      y = y.tr,
      lwd = cex)

#Estimated
lines(x = newx,
      y = y.cf,
      lwd = cex,
      lty = "dashed")

#Abline
abline(v = t.time,
       lwd = cex,
       lty = "dashed",
       col = "grey")

#X-axis text
axis(side = 1,
     at = seq(min(newx),
              max(newx),
              2),
     cex.axis = cex)

#Legend
legend("topleft",
       legend = c("Treated Average",
                  "Estimated Y(0) Average for the Treated"),
       lty = c(1, 2),
       cex = cex,
       bty = "n")

#Close
dev.off()

###################################
#Figure 3
###################################
#-------------
#(1) Parameters
#-------------
#Load results: Column (2) of Table 1
load("result/bpcausal_original.RData")
result.use = effSummary(result.original)

#Titles
xtitle = ""
ytext = "Effect on New World Bank Infrastructure Projects"

#Limits
ylim = c(-3, 3)
newx = 1992:2019
t.time = 2016

#Cex
cex = 2

#-------------
#(2) Plot
#-------------
#Setup
png(filename = "figure/Figure_3.png",
    height = 800,
    width = 1200)
par(mar = c(2, 5, 5, 1),
    lend = 1)

#----------
#Plot empty graph
#----------
plot(1,
     type = "n",
     xlab = "",
     ylab = "",
     axes = F,
     xlim = range(newx),
     ylim = ylim)
box()

title(main = "World Bank Infrastructure Projects",
      line = 1,
      cex.main = cex)

#-----------------------
#Add axis
#-----------------------
#X-axis labels
axis(side = 1, 
     at = seq(min(newx),
              max(newx),
              by = 2),
     cex.axis = cex)
#X-axis title
mtext(text = xtitle, 
      side = 1, 
      line = 2.5,
      cex = cex)
#Y-axis
axis(side = 2,
     at = seq(min(ylim),
              max(ylim),
              by = 1),
     cex.axis = cex)
#Y-axis title
mtext(text = ytext,
      side = 2,
      line = 2.5, 
      cex = cex)

#-----------------------
#Lines and shades
#-----------------------
#Ablines
abline(v = t.time,
       col = "gray",
       lty = 2,
       lwd = cex)
abline(h = 0,
       col = "gray20",
       lty = 2,
       lwd = cex)

#ATTs
lines(x = newx,
      y = result.use$est.eff$estimated_ATT,
      col = 1,
      lty = 1,
      lwd = cex)
#CI
polygon(x = c(rev(newx), 
              newx),
        y = c(rev(result.use$est.eff$estimated_ATT_ci_l), 
              result.use$est.eff$estimated_ATT_ci_u),
        col = "#55555530",
        border = NA)
#Legend
legend("topleft",
       legend = c("Estimated ATT",
                  "95% Credible Intervals"),
       cex = cex,
       seg.len = 2,
       col = c(1, "#55555530"),
       lty = c(1, 5),
       lwd = c(2, 20),
       bty = "n")

#Close
dev.off()

